# imports
import panel as pn
pn.extension('plotly')
import plotly.express as px
import pandas as pd
import hvplot.pandas
import matplotlib.pyplot as plt
import os
from pathlib import Path
from dotenv import load_dotenv
# Read the Mapbox API key
load_dotenv()
map_box_api = os.getenv("mapbox")
# Read the census data into a Pandas DataFrame
file_path = Path("toronto_neighbourhoods_census_data.csv")
to_data = pd.read_csv(file_path, index_col="year")
to_data.head()
| neighbourhood | single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | average_house_value | shelter_costs_owned | shelter_costs_rented | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| year | ||||||||||||
| 2001 | Agincourt North | 3715 | 1480 | 0 | 1055 | 1295 | 195 | 185 | 5 | 200388 | 810 | 870 |
| 2001 | Agincourt South-Malvern West | 3250 | 1835 | 0 | 545 | 455 | 105 | 425 | 0 | 203047 | 806 | 892 |
| 2001 | Alderwood | 3175 | 315 | 0 | 470 | 50 | 185 | 370 | 0 | 259998 | 817 | 924 |
| 2001 | Annex | 1060 | 6090 | 5 | 1980 | 605 | 275 | 3710 | 165 | 453850 | 1027 | 1378 |
| 2001 | Banbury-Don Mills | 3615 | 4465 | 0 | 240 | 380 | 15 | 1360 | 0 | 371864 | 1007 | 1163 |
# Calculate the sum number of dwelling types units per year (hint: use groupby)
to_data_mod= to_data.drop(columns=['average_house_value','shelter_costs_owned','shelter_costs_rented'])
to_sum_dwellings = to_data_mod.groupby(['year']).sum()
to_sum_dwellings.head()
| single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | |
|---|---|---|---|---|---|---|---|---|
| year | ||||||||
| 2001 | 300930 | 355015 | 75 | 90995 | 52355 | 23785 | 116900 | 3040 |
| 2006 | 266860 | 379400 | 165 | 69430 | 54690 | 44095 | 162850 | 1335 |
| 2011 | 274940 | 429220 | 100 | 72480 | 60355 | 44750 | 163895 | 2165 |
| 2016 | 269680 | 493270 | 95 | 71200 | 61565 | 48585 | 165575 | 2845 |
# Save the dataframe as a csv file
to_sum_dwellings.to_csv('to_sum_dwellings.csv')
# Helper create_bar_chart function
def create_bar_chart(data, title, xlabel, ylabel, color):
"""
Create a barplot based in the data argument.
"""
fig = plt.figure()
bar_chart = data.plot.bar(color=color)
bar_chart.set_xlabel(xlabel)
bar_chart.set_ylabel(ylabel)
bar_chart.set_title(title)
plt.show()
plt.close(fig)
# Create a bar chart per year to show the number of dwelling types
# Bar chart for 2001
create_bar_chart(to_sum_dwellings.loc[2001], "Dwelling Types in Toronto in 2001", "2001", "Dwelling Types Units", "red")
# Bar chart for 2006
create_bar_chart(to_sum_dwellings.loc[2006], "Dwelling Types in Toronto in 2006", "2006", "Dwelling Types Units", "blue")
# Bar chart for 2011
create_bar_chart(to_sum_dwellings.loc[2011], "Dwelling Types in Toronto in 2011", "2011", "Dwelling Types Units", "orange")
# Bar chart for 2016
create_bar_chart(to_sum_dwellings.loc[2016], "Dwelling Types in Toronto in 2016", "2016", "Dwelling Types Units", "magenta")
# Calculate the average monthly shelter costs for owned and rented dwellings
avg_monthly_costs =to_data[['shelter_costs_owned', 'shelter_costs_rented']].groupby(['year']).mean()
avg_monthly_costs
| shelter_costs_owned | shelter_costs_rented | |
|---|---|---|
| year | ||
| 2001 | 846.878571 | 1085.935714 |
| 2006 | 1316.800000 | 925.414286 |
| 2011 | 1448.214286 | 1019.792857 |
| 2016 | 1761.314286 | 1256.321429 |
# Helper create_line_chart function
def create_line_chart(data, title, xlabel, ylabel, color):
"""
Create a line chart based in the data argument.
"""
fig = plt.figure()
line_chart = data.plot.line(color=color)
line_chart.set_xlabel(xlabel)
line_chart.set_ylabel(ylabel)
line_chart.set_title(title)
plt.show()
plt.close(fig)
# Create two line charts, one to plot the monthly shelter costs for owned dwelleing and other for rented dwellings per year
# Line chart for owned dwellings
create_line_chart(avg_monthly_costs['shelter_costs_owned'], 'Average Monthly Shelter Costs for Owned Dwellings in Toronto','Year', 'Average Monthly Shelter Costs', 'blue')
# Line chart for rented dwellings
create_line_chart(avg_monthly_costs['shelter_costs_rented'], 'Average Monthly Shelter Costs for Owned Dwellings in Toronto','Year', 'Average Monthly Shelter Costs', 'orange')
# Calculate the average house value per year
average_house_value = to_data['average_house_value'].groupby('year').mean()
average_house_value
year 2001 289882.885714 2006 424059.664286 2011 530424.721429 2016 664068.328571 Name: average_house_value, dtype: float64
# Plot the average house value per year as a line chart
create_line_chart(average_house_value, 'Average House Value in Toronto', 'Year', 'Avg. House Value', 'blue')
# Create a new DataFrame with the mean house values by neighbourhood per year
avg_value_by_neighbourhood = to_data.filter(['year','neighbourhood','average_house_value'], axis=1)
avg_value_by_neighbourhood.reset_index(inplace=True)
avg_value_by_neighbourhood.head(10)
| year | neighbourhood | average_house_value | |
|---|---|---|---|
| 0 | 2001 | Agincourt North | 200388 |
| 1 | 2001 | Agincourt South-Malvern West | 203047 |
| 2 | 2001 | Alderwood | 259998 |
| 3 | 2001 | Annex | 453850 |
| 4 | 2001 | Banbury-Don Mills | 371864 |
| 5 | 2001 | Bathurst Manor | 304749 |
| 6 | 2001 | Bay Street Corridor | 257404 |
| 7 | 2001 | Bayview Village | 327644 |
| 8 | 2001 | Bayview Woods-Steeles | 343535 |
| 9 | 2001 | Bedford Park-Nortown | 565304 |
# Use hvplot to create an interactive line chart of the average house value per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
avg_value_by_neighbourhood.rename(columns = {'year':'Year', 'average_house_value':'Avg. House Value'}, inplace = True)
avg_value_by_neighbourhood.hvplot.line(x='Year', y='Avg. House Value', groupby='neighbourhood')
# Fetch the data of all dwelling types per year
all_dwelling_types = to_data
all_dwelling_types.reset_index(inplace=True)
all_dwelling_types.head(10)
| year | neighbourhood | single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | average_house_value | shelter_costs_owned | shelter_costs_rented | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2001 | Agincourt North | 3715 | 1480 | 0 | 1055 | 1295 | 195 | 185 | 5 | 200388 | 810 | 870 |
| 1 | 2001 | Agincourt South-Malvern West | 3250 | 1835 | 0 | 545 | 455 | 105 | 425 | 0 | 203047 | 806 | 892 |
| 2 | 2001 | Alderwood | 3175 | 315 | 0 | 470 | 50 | 185 | 370 | 0 | 259998 | 817 | 924 |
| 3 | 2001 | Annex | 1060 | 6090 | 5 | 1980 | 605 | 275 | 3710 | 165 | 453850 | 1027 | 1378 |
| 4 | 2001 | Banbury-Don Mills | 3615 | 4465 | 0 | 240 | 380 | 15 | 1360 | 0 | 371864 | 1007 | 1163 |
| 5 | 2001 | Bathurst Manor | 2405 | 1550 | 0 | 130 | 130 | 375 | 745 | 0 | 304749 | 843 | 1052 |
| 6 | 2001 | Bay Street Corridor | 10 | 7575 | 0 | 0 | 15 | 0 | 240 | 0 | 257404 | 1218 | 1142 |
| 7 | 2001 | Bayview Village | 2170 | 630 | 0 | 170 | 765 | 15 | 640 | 0 | 327644 | 1197 | 1164 |
| 8 | 2001 | Bayview Woods-Steeles | 1650 | 1715 | 0 | 925 | 105 | 10 | 170 | 5 | 343535 | 1212 | 1018 |
| 9 | 2001 | Bedford Park-Nortown | 4985 | 2080 | 0 | 45 | 40 | 210 | 1235 | 15 | 565304 | 933 | 1491 |
# Use hvplot to create an interactive bar chart of the number of dwelling types per neighbourhood
# The plot should have a dropdown selector for the neighbourhood
all_dwelling_types.rename(columns = {'year':'Year'}, inplace = True)
all_dwelling_types.hvplot.bar(x='Year', ylabel= "Dwelling Type Units", groupby='neighbourhood', rot=90,ylim=(0,4000), yticks=5, height=400)
# Getting the data from the top 10 expensive neighbourhoods
mean_values_neighbourhood = all_dwelling_types.groupby('neighbourhood').mean()[['single_detached_house', 'apartment_five_storeys_plus','movable_dwelling','semi_detached_house','row_house','duplex','apartment_five_storeys_less','other_house','average_house_value','shelter_costs_owned','shelter_costs_rented']]
mean_values_neighbourhood= mean_values_neighbourhood.sort_values(by='average_house_value', ascending=False)
top_ten_neighbourhoods= mean_values_neighbourhood.head(10)
top_ten_neighbourhoods.reset_index(inplace=True)
top_ten_neighbourhoods
| neighbourhood | single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | average_house_value | shelter_costs_owned | shelter_costs_rented | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Bridle Path-Sunnybrook-York Mills | 2260.00 | 331.25 | 0.00 | 36.25 | 90.00 | 25.0 | 40.00 | 0.00 | 1526485.75 | 2360.75 | 2321.75 |
| 1 | Forest Hill South | 1742.50 | 2031.25 | 1.25 | 61.25 | 45.00 | 75.0 | 1027.50 | 3.75 | 1195992.50 | 1781.00 | 1313.75 |
| 2 | Lawrence Park South | 3472.50 | 773.75 | 0.00 | 126.25 | 38.75 | 225.0 | 966.25 | 16.25 | 1094027.75 | 1954.00 | 1372.75 |
| 3 | Rosedale-Moore Park | 2498.75 | 4641.25 | 0.00 | 486.25 | 245.00 | 327.5 | 1618.75 | 2.50 | 1093640.00 | 1909.75 | 1537.25 |
| 4 | St.Andrew-Windfields | 3225.00 | 1670.00 | 0.00 | 185.00 | 552.50 | 97.5 | 586.25 | 5.00 | 999107.00 | 1880.25 | 1384.50 |
| 5 | Casa Loma | 916.25 | 2310.00 | 0.00 | 288.75 | 201.25 | 162.5 | 1192.50 | 2.50 | 981064.25 | 1873.75 | 1547.75 |
| 6 | Bedford Park-Nortown | 4865.00 | 1981.25 | 0.00 | 43.75 | 57.50 | 287.5 | 1275.00 | 88.75 | 930415.25 | 1786.75 | 1255.00 |
| 7 | Forest Hill North | 1488.75 | 3392.50 | 0.00 | 12.50 | 16.25 | 82.5 | 402.50 | 1.25 | 851680.50 | 1722.75 | 1245.50 |
| 8 | Kingsway South | 2326.25 | 576.25 | 0.00 | 66.25 | 48.75 | 20.0 | 336.25 | 2.50 | 843234.25 | 1736.75 | 1622.00 |
| 9 | Yonge-St.Clair | 565.00 | 3948.75 | 0.00 | 425.00 | 212.50 | 172.5 | 1308.75 | 6.25 | 813220.25 | 1680.75 | 1369.00 |
# Plotting the data from the top 10 expensive neighbourhoods
top_ten_neighbourhoods.rename(columns = {'neighbourhood':'Neighbourhood', 'average_house_value':'Avg. House Value'},inplace=True)
top_ten_neighbourhoods.hvplot.bar(x='Neighbourhood', y= "Avg. House Value", rot=90, height=500)
C:\Users\Owner\AppData\Local\Temp\ipykernel_2828\815796198.py:2: SettingWithCopyWarning:
A value is trying to be set on a copy of a slice from a DataFrame
See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
top_ten_neighbourhoods.rename(columns = {'neighbourhood':'Neighbourhood', 'average_house_value':'Avg. House Value'},inplace=True)
# Load neighbourhoods coordinates data
file_path = Path("toronto_neighbourhoods_coordinates.csv")
df_neighbourhood_locations = pd.read_csv(file_path)
df_neighbourhood_locations.head(5)
| neighbourhood | lat | lon | |
|---|---|---|---|
| 0 | Agincourt North | 43.805441 | -79.266712 |
| 1 | Agincourt South-Malvern West | 43.788658 | -79.265612 |
| 2 | Alderwood | 43.604937 | -79.541611 |
| 3 | Annex | 43.671585 | -79.404001 |
| 4 | Banbury-Don Mills | 43.737657 | -79.349718 |
# Calculate the mean values for each neighborhood
avg_values_neighbourhood= all_dwelling_types.groupby('neighbourhood').mean()[['single_detached_house', 'apartment_five_storeys_plus','movable_dwelling','semi_detached_house','row_house','duplex','apartment_five_storeys_less','other_house','average_house_value','shelter_costs_owned','shelter_costs_rented']]
avg_values_neighbourhood.reset_index(inplace=True)
avg_values_neighbourhood.head(5)
| neighbourhood | single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | average_house_value | shelter_costs_owned | shelter_costs_rented | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Agincourt North | 3435.00 | 1947.50 | 2.50 | 863.75 | 1406.25 | 512.50 | 547.50 | 10.00 | 329811.5 | 1109.00 | 983.50 |
| 1 | Agincourt South-Malvern West | 2897.50 | 2180.00 | 1.25 | 375.00 | 456.25 | 523.75 | 628.75 | 32.50 | 334189.0 | 1131.25 | 985.00 |
| 2 | Alderwood | 2903.75 | 302.50 | 1.25 | 503.75 | 76.25 | 302.50 | 502.50 | 1.25 | 427922.5 | 1166.75 | 1003.25 |
| 3 | Annex | 751.25 | 7235.00 | 1.25 | 1375.00 | 613.75 | 355.00 | 4605.00 | 83.75 | 746977.0 | 1692.75 | 1315.25 |
| 4 | Banbury-Don Mills | 3572.50 | 5388.75 | 1.25 | 273.75 | 626.25 | 32.50 | 1340.00 | 0.00 | 612039.0 | 1463.50 | 1242.75 |
# Join the average values with the neighbourhood locations
df_combined = pd.merge(df_neighbourhood_locations, avg_values_neighbourhood, on="neighbourhood")
df_combined.head(5)
| neighbourhood | lat | lon | single_detached_house | apartment_five_storeys_plus | movable_dwelling | semi_detached_house | row_house | duplex | apartment_five_storeys_less | other_house | average_house_value | shelter_costs_owned | shelter_costs_rented | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Agincourt North | 43.805441 | -79.266712 | 3435.00 | 1947.50 | 2.50 | 863.75 | 1406.25 | 512.50 | 547.50 | 10.00 | 329811.5 | 1109.00 | 983.50 |
| 1 | Agincourt South-Malvern West | 43.788658 | -79.265612 | 2897.50 | 2180.00 | 1.25 | 375.00 | 456.25 | 523.75 | 628.75 | 32.50 | 334189.0 | 1131.25 | 985.00 |
| 2 | Alderwood | 43.604937 | -79.541611 | 2903.75 | 302.50 | 1.25 | 503.75 | 76.25 | 302.50 | 502.50 | 1.25 | 427922.5 | 1166.75 | 1003.25 |
| 3 | Annex | 43.671585 | -79.404001 | 751.25 | 7235.00 | 1.25 | 1375.00 | 613.75 | 355.00 | 4605.00 | 83.75 | 746977.0 | 1692.75 | 1315.25 |
| 4 | Banbury-Don Mills | 43.737657 | -79.349718 | 3572.50 | 5388.75 | 1.25 | 273.75 | 626.25 | 32.50 | 1340.00 | 0.00 | 612039.0 | 1463.50 | 1242.75 |
# Create a scatter mapbox to analyze neighbourhood info
px.set_mapbox_access_token(map_box_api)
map_plot = px.scatter_mapbox(
df_combined,
lat='lat',
lon='lon',
size="average_house_value",
color="average_house_value",
color_continuous_scale=px.colors.cyclical.IceFire,
size_max=15,
zoom=9,
hover_name="neighbourhood",
hover_data= ["shelter_costs_owned",
"shelter_costs_rented",
"single_detached_house",
"apartment_five_storeys_plus",
"movable_dwelling",
"semi_detached_house",
"row_house",
"duplex",
"apartment_five_storeys_less",
"other_house"],
height=600
)
# Display the map
map_plot.show()
# Create a bar chart row facet to plot the average house values for all Toronto's neighbourhoods per year
fig = px.bar(
avg_value_by_neighbourhood,
x="neighbourhood",
y="Avg. House Value",
facet_row="Year",
color="Avg. House Value",
color_continuous_scale=px.colors.cyclical.IceFire,
height=900, width=950,
title="Average House Values in Toronto Per Neighbourhood"
)
fig.show()